import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import matplotlib.pyplot as plt  

   
# Load the dataset   
credit_customers = pd.read_csv(os.path.join(sys.argv[1], 'credit_customers.csv')) 
  
# Plot histograms   
fig, axes = plt.subplots(2, 2, figsize=(12, 8))   

credit_customers['credit_history'].value_counts().plot(kind='bar', ax=axes[0, 0], title='Credit History')   
credit_customers['employment'].value_counts().plot(kind='bar', ax=axes[0, 1], title='Employment')   
credit_customers['age'].plot(kind='hist', bins=20, ax=axes[1, 0], title='Age')   
credit_customers['credit_amount'].plot(kind='hist', bins=20, ax=axes[1, 1], title='Credit Amount')   

plt.tight_layout()   
plt.savefig('ref_result/subplots.png')
# plt.show() 

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_credit_history = credit_customers.groupby('credit_history')['class'].value_counts(normalize=True).unstack().fillna(0)['good']   

print(approval_rates_credit_history)
# pickle.dump(approval_rates_credit_history,open("./ref_result/approval_rates_credit_history.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_employment = credit_customers.groupby('employment')['class'].value_counts(normalize=True).unstack().fillna(0)['good']      

print(approval_rates_employment)
# pickle.dump(approval_rates_employment,open("./ref_result/approval_rates_employment.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_middle_age = credit_customers[(credit_customers['age'] > 40) & (credit_customers['age'] < 59)].groupby('age')['class'].value_counts(normalize=True).unstack().fillna(0)['good'] 

print(approval_rates_middle_age)
# pickle.dump(approval_rates_middle_age,open("./ref_result/approval_rates_middle_age.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
credit_customers['credit_amount_range'] = pd.cut(credit_customers['credit_amount'], bins=[0, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000], include_lowest=True)   

approval_rates_credit_amount = credit_customers.groupby('credit_amount_range')['class'].value_counts(normalize=True).unstack().fillna(0)['good'] 

print(approval_rates_credit_amount)
# pickle.dump(approval_rates_credit_amount,open("./ref_result/approval_rates_credit_amount.pkl","wb"))

  

